Introduction

This is the online supplementary to the paper “Traditional Chinese Medicine: A Bayesian network model of public awareness, usage determinants, and perception of scientific support in Austria” by Eigenschink, Bellach, Leonard, Dablander, Maier, Dablander, and Sitte. It includes the code and data to fully reproduce the results.

Data Preparation

We load the data, which holds 1382 participants.

library('dplyr')
library('readr')
library('tidyr')
library('kableExtra')

dat_raw <- read_delim('Data/dat.csv', delim = ';', skip = 1, na = c('', 'NA', -9))

dat1 <- dat_raw %>% 
  mutate(
    malus = as.numeric(gsub(',', '.', malus)),
    age = as.numeric(age),
    age = ifelse(age == 1994, 2020 - 1994, age),
    age = ifelse(age == '60 Jahre', 60, age),
    age = ifelse(age == 'Sechzig', 60, age),
    age = ifelse(age == '50 plus', 50, age),
  )

dim(dat1)
## [1] 1382   41

Data Exclusion

We select only the relevant variables.

relevant_variables <- c(
  'obtained', 'gender', 'age', 'education', 'employement',
  'income', 'disease', 'tcm_known', 'tcm_use', 'tcm_frequency', 'tcm_science', 'tcm_trust',
  'med_money', 'tcm_money', 'tcm_moneyrel', 'globuli_use', 'globuli_pass', 'vax_use',
  'vax_fresh', 'needle_use', 'needle_frequency', 'type'
)

dat_relevant <- select(dat1, all_of(relevant_variables))

There are 9 participants who had NAs for all relevant variables. We remove these participants.

index_all_na <- which(apply(select(dat_relevant, -type), 1, function(x) all(is.na(x))))
length(index_all_na)
## [1] 9
dat2 <- dat1[-index_all_na, ]

We also exclude those that indicated NA in some of the demographic variables, including education, employement, and income. This excluded 44 participants.

demographics <- c('gender', 'age', 'education', 'employement', 'income')

dat3 <- dat2 %>% 
  drop_na(demographics)

nrow(dat2) - nrow(dat3)
## [1] 44

For simplicity, we remove anybody who indicates “other” for gender (4 observations). Similarly, we remove somebody who indicated “-” as his/her age. This removes 4 participants.

dat4 <- filter(dat3, age != '-', gender != 3)
nrow(dat3) - nrow(dat4)
## [1] 4

Finally, we remove participants that have answered the online survey unreasonably quickly. We do this by removing participants with a malus score of 2 or higher (or those that had NA). This excludes 7 participants.

dat5 <- filter(dat4, !is.na(malus), !(type == 2 & malus > 2))
nrow(dat4) - nrow(dat5)
## [1] 7

We remove those particicpants that have type = ‘street’ yet have no obtained = 1.

dat <- dat5 %>% filter(!(type == 1 & obtained != 1))

nrow(dat5) - nrow(dat)
## [1] 11

In total, we have excluded 75 participants.

nrow(dat_raw) - nrow(dat)
## [1] 75

Descriptives

In this section, we will visualize and describe the sample. Here are some descriptive statistics we report in the main text.

# Get age and education statistics
dat %>%
  group_by(gender) %>%
  summarize(
    n = n(),
    age_m = mean(age),
    age_sd = sd(age),
    edu_norm = mean(education >= 6)
  )
## # A tibble: 2 x 5
##   gender     n age_m age_sd edu_norm
##    <dbl> <int> <dbl>  <dbl>    <dbl>
## 1      1   901  41.7   15.6    0.422
## 2      2   406  40.5   16.9    0.394
# Get income statistics
dat %>%
  filter(income != 12) %>% 
  group_by(gender) %>%
  summarize(
    n = n(),
    inc_norm = mean(income >= 7)
  )
## # A tibble: 2 x 3
##   gender     n inc_norm
##    <dbl> <int>    <dbl>
## 1      1   760    0.321
## 2      2   343    0.472
# Get disease statistics
dat %>%
  filter(!is.na(disease)) %>% 
  group_by(gender) %>%
  summarize(
    n = n(),
    disease = mean(2 - disease)
  )
## # A tibble: 2 x 3
##   gender     n disease
##    <dbl> <int>   <dbl>
## 1      1   900   0.453
## 2      2   406   0.387
# 'Gesamtzeit Fragebogen' according to the codebook
dat %>%
  group_by(type) %>% 
  summarize(
    time = median(time_total)
  )
## # A tibble: 2 x 2
##    type  time
##   <dbl> <dbl>
## 1     1   189
## 2     2   212

Demographic Variables

We first visualise the age, education, and income distribution across gender.

library('ggplot2')
library('gridExtra')
library('RColorBrewer')

# Easy to change colours
cols <- brewer.pal(3, 'Set1')[-2]
cols <- rev(ggsci::pal_jco(alpha = 0.60)(2))

custom_theme <- theme_minimal() +
  theme(
    legend.position = 'top',
    axis.ticks.y = element_blank(),
    panel.border = element_blank(),
    panel.spacing.x = unit(1, 'lines'),
    panel.spacing.y = unit(1, 'lines'),
    strip.text.x = element_text(size = 14),
    strip.text.y = element_text(size = 14),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 14),
    axis.text = element_text(size = 10),
    axis.title = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18),
    plot.subtitle = element_text(hjust = 0.50, size = 14),
  )

remove_grid <- theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank()
)

education_labels <- c(
  'Elementary school',
  'Secondary school (GSCE)',
  'Apprenticeship',
  'Further education (A-level)',
  'Special certificate*',
  'Bachelor\'s',
  'Master\'s',
  'PhD'
)

income_labels <- c(
  'No income', '< 250', '250 - 500', '500 - 1000',
   '1000 - 1500', '1500 - 2000', '2000 - 2500',
  '2500 - 3000',  '3000 - 3500', '3500 - 4000',
  '> 4000', 'No answer'
)

income_labels <- c(
  'No income', '< 250€', '250€ - < 500€', '500€ - < 1000€',
   '1000€ - < 1500€', '1500€ - < 2000€', '2000€ - < 2500€',
  '2500€ - < 3000€',  '3000€ - < 3500€', '3500€ - < 4000€',
  '>= 4000€', 'No answer'
)

desc <- dat %>% 
  select(gender, age, education, income) %>% 
  pivot_longer(-gender, names_to = 'variable') %>% 
  mutate(gender = ifelse(gender == 1, 'Female (n = 901)', 'Male (n = 406)'))


desc_age <- filter(desc, variable == 'age')

male_values <- desc_age[desc_age$gender == 'Male (n = 406)', ]$value
female_values <- desc_age[desc_age$gender != 'Male (n = 406)', ]$value

desc_age$age_rel <- sapply(seq(nrow(desc_age)), function(i) {
  row <- desc_age[i, ]
  is_male <- row$gender == 'Male (n = 406)'
  ifelse(is_male, mean(row$value == male_values), mean(row$value == female_values))
})

p1 <- ggplot(unique(desc_age), aes(x = value, y = age_rel, fill = gender)) +
  geom_bar(stat = 'identity', position = position_dodge(1), width = 1) +
  scale_fill_manual('', values = cols) +
  scale_x_continuous('Age', breaks = scales::pretty_breaks(n = 10)) +
  ylab('Relative frequency') +
  ggtitle('Age') +
  custom_theme +
  remove_grid

plot_bar <- function(desc, variable, title, labels, colors = cols) {
  d <- desc[desc$variable == variable, ]
  
  male_values <- d[d$gender == 'Male (n = 406)', ]$value
  female_values <- d[d$gender != 'Male (n = 406)', ]$value
  
  d$rel <- sapply(seq(nrow(d)), function(i) {
    row <- d[i, ]
    is_male <- row$gender == 'Male (n = 406)'
    ifelse(is_male, mean(row$value == male_values), mean(row$value == female_values))
  })
  
  ggplot(unique(d), aes(x = value, y = rel, fill = gender)) +
    # geom_bar(position = position_dodge(0.9)) +
    geom_bar(stat = 'identity', position = position_dodge(1)) +#, width = 1) +
    scale_fill_manual('', values = colors) +
    scale_x_continuous('', labels = labels, breaks = seq(length(labels))) +
    ylab('Relative frequency') +
    ggtitle(title) +
    custom_theme +
    remove_grid +
    coord_flip() +
    guides(fill = FALSE)
}

p2 <- plot_bar(desc, 'education', 'Education', education_labels) + ylim(c(0, 0.30))
p3 <- plot_bar(desc, 'income', 'Income', income_labels) + ylim(c(0, 0.18))

# cairo_pdf('Figures/Demographics.pdf', width = 9, height = 8)
grid.arrange(
  p1, p2, p3,
  layout_matrix = rbind(c(1, 1), c(2, 3))
)

# dev.off()

Representativeness

We now compare our sample to data from Statistik Austria (https://bit.ly/3qgYxsN; “Population 15 years and older by highest level of education completed by sex and age, 2019”, Excel file), which provides data on all Austrians, to assess the representativeness of our sample in terms of age, gender, and education.

Since our age variable is more fine-grained, we aggregate it to match Statistik Austria’s reporting. Note that we did not ask for post-secondary non-tertiary or short tertiary education, which are included in Statistik Austria’s reporting.

library('readxl')

datsa <- read_excel('Data/statistik-austria.xlsx', sheet = 2)

datsa_long <- datsa %>%
  pivot_longer(
    !(gender | age_group | total),
    names_to = 'education_group',
    values_to = 'count'
  ) %>%
  select(-total) %>% 
  mutate(
    education_group = factor(education_group, levels = unique(education_group))
  )

Here we compare the marginal distribution of age across the sexes in the Austrian general population to the sample distribution.

create_age_groups <- function(dat) {
  age_groups <- c('15 - 29', '30 - 49', '50 - 64', '65 - 84', '85 or older')
  
  dat[['age_group']] <- ifelse(
    dat[['age']] <= 29, age_groups[1],
    ifelse(
      dat[['age']] <= 49, age_groups[2],
      ifelse(
        dat[['age']] <= 64, age_groups[3],
        ifelse(
          dat[['age']] <= 84, age_groups[4], age_groups[5]
        )
      )
    )
  )
  
  dat
}

demo <- select(dat, age, gender, income, education) %>% 
  mutate(gender = ifelse(gender == 1, 'female', 'male'))

demo <- create_age_groups(demo)


# Statistik Austria data from https://bit.ly/3qgYxsN
gender_age_SA <- datsa %>%
  group_by(gender, age_group) %>%
  summarize(total = total) %>% 
  group_by(gender) %>% 
  mutate(pop_prop = round(total / sum(total), 4)) %>% 
  select(-total)

gender_age_obs <- demo %>%
  group_by(gender, age_group) %>%
  summarize(total = n()) %>% 
  group_by(gender) %>% 
  mutate(obs_prop = round(total / sum(total), 4)) %>% 
  select(-total)

gender_age <- full_join(gender_age_SA, gender_age_obs)
gender_age
## # A tibble: 10 x 4
## # Groups:   gender [2]
##    gender age_group   pop_prop obs_prop
##    <chr>  <chr>          <dbl>    <dbl>
##  1 female 15 - 29       0.194    0.268 
##  2 female 30 - 49       0.307    0.407 
##  3 female 50 - 64       0.253    0.236 
##  4 female 65 - 84       0.206    0.0855
##  5 female 85 or older   0.0391   0.0033
##  6 male   15 - 29       0.215    0.367 
##  7 male   30 - 49       0.325    0.303 
##  8 male   50 - 64       0.262    0.224 
##  9 male   65 - 84       0.178    0.106 
## 10 male   85 or older   0.02    NA

Below we look at education across gender.

create_education_groups <- function(dat) {
  
  education_groups <- c('primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
  
  dat[['education_group']] <- ifelse(
  dat[['education']] == 1, education_groups[1],
    ifelse(
      dat[['education']] == 2, education_groups[2],
      ifelse(
        dat[['education']] %in% c(3, 4, 5), education_groups[3],
        ifelse(
          dat[['education']] == 6, education_groups[4],
          ifelse(
            dat[['education']] == 7, education_groups[5], education_groups[6]
          )
        )
      )
    )
  )
  
  dat[['education_group']] <- factor(
    dat[['education_group']],
    levels = c('primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
  )
  dat
}

demo <- create_education_groups(demo)

# Statistik Austria data from https://bit.ly/3qgYxsN
gender_education_SA <- datsa_long %>%
  group_by(gender, education_group) %>%
  summarize(total = sum(count)) %>% 
  group_by(gender) %>% 
  mutate(
    pop_prop = round(total / sum(total), 4),
    education_group_coarse = ifelse(
      education_group %in% c('lower_secondary_1', 'lower_secondary_2'),'lower_secondary',
      ifelse(
        education_group %in% c('upper_secondary_1', 'upper_secondary_2'),
        'upper_secondary', as.character(education_group)
      )
    )
  ) %>% 
  select(-total, -education_group) %>% 
  group_by(gender, education_group_coarse) %>% 
  summarize(pop_prop = sum(pop_prop)) %>% 
  mutate(
    education_group = factor(
      education_group_coarse,
      levels = c(
        'primary', 'lower_secondary', 'upper_secondary',
        'post_secondary_non_tertiary', 'short_tertiary', 'bachelor', 'master', 'doctorate'
      )
    )
  ) %>% 
  select(gender, education_group, pop_prop) %>%
  arrange(gender, education_group)

gender_education_obs <- demo %>%
  group_by(gender, education_group) %>%
  summarize(total = n()) %>% 
  group_by(gender) %>% 
  mutate(obs_prop = round(total / sum(total), 4)) %>% 
  select(-total)

gender_education <- full_join(gender_education_SA, gender_education_obs)
gender_education
## # A tibble: 16 x 4
## # Groups:   gender [2]
##    gender education_group             pop_prop obs_prop
##    <chr>  <fct>                          <dbl>    <dbl>
##  1 female primary                       0.0762   0.0078
##  2 female lower_secondary               0.220    0.0499
##  3 female upper_secondary               0.427    0.520 
##  4 female post_secondary_non_tertiary   0.0336  NA     
##  5 female short_tertiary                0.119   NA     
##  6 female bachelor                      0.0291   0.160 
##  7 female master                        0.0883   0.212 
##  8 female doctorate                     0.0072   0.0499
##  9 male   primary                       0.0372   0.0074
## 10 male   lower_secondary               0.169    0.0887
## 11 male   upper_secondary               0.510    0.510 
## 12 male   post_secondary_non_tertiary   0.009   NA     
## 13 male   short_tertiary                0.148   NA     
## 14 male   bachelor                      0.0202   0.0887
## 15 male   master                        0.0943   0.229 
## 16 male   doctorate                     0.0129   0.0764

Because we did not assess short tertiary education, and in fact some responses that we now coded as ‘upper_secondary’ education (e.g., certain types of ‘Handelsakademien’, depending on the length of the schooling) are actually short tertiary education, we merge short tertiary education (as well as post-secondary non-tertiary) into upper secondary education in the Statistik Austria data. This makes the survey weighting we do below possible, because it assumes no missing data. We stratify according to the joint distribution of gender, age, and education; we prepare this here.

datsa$lower_secondary <- datsa$lower_secondary_1 + datsa$lower_secondary_2
datsa$upper_secondary <- c(
  datsa$upper_secondary_1 + datsa$upper_secondary_2 +
  datsa$post_secondary_non_tertiary + datsa$short_tertiary
)

pop_joint <- datsa[, c(1, 2, 4, 14, 15, 11, 12, 13)]
pop_joint <- pop_joint %>% 
  pivot_longer(
    !(gender | age_group),
    names_to = 'education_group',
    values_to = 'Freq'
  ) %>% 
  mutate(
    gender = ifelse(gender == 'female', 1, 2),
    education_group = factor(
      education_group,
      levels = c(
        'primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
    )
  )

# Population joint distribution of gender, age, and education
pop_joint
## # A tibble: 60 x 4
##    gender age_group education_group   Freq
##     <dbl> <chr>     <fct>            <dbl>
##  1      2 15 - 29   primary          30556
##  2      2 15 - 29   lower_secondary 252453
##  3      2 15 - 29   upper_secondary 453927
##  4      2 15 - 29   bachelor         36496
##  5      2 15 - 29   master           25608
##  6      2 15 - 29   doctorate          451
##  7      2 30 - 49   primary          19481
##  8      2 30 - 49   lower_secondary 177582
##  9      2 30 - 49   upper_secondary 787578
## 10      2 30 - 49   bachelor         34841
## # … with 50 more rows

TCM, Homeopathy, Accupuncture Use

Descriptives on all participants

We first give descriptives for all participants (rather than only those who are familiar with TCM, see below). We remove participants who did not answer the relevant question, which results in 19 exclusions.

dat_var_all <- dat %>% 
  filter(
    !is.na(disease), !is.na(tcm_known), !is.na(tcm_use),
    !is.na(globuli_use), !is.na(globuli_pass), !is.na(needle_use)
  )

nrow(dat) - nrow(dat_var_all)
## [1] 19
library('papaja')

tab_use_all <- dat_var_all %>% 
  group_by(gender) %>% 
  summarize(
    # disease_m = mean(disease == 1),
    tcm_known_m = mean(tcm_known == 1),
    tcm_use_m = mean(tcm_use == 1),
    globuli_use_m = mean(globuli_use == 1),
    globuli_pass_m = mean(globuli_pass == 1),
    needle_use_m = mean(needle_use == 1),
  ) %>% 
  mutate_all(
    round, 3
  ) %>% 
  mutate(
    gender = ifelse(gender == 1, 'Female', 'Male')
  )

cnames <- c(
  'gender', 'Knows TCM', 'Has used TCM', 'Has used Homeopathy',
  'Has passed on Homeopathy', 'Has used Accupuncture'
)
colnames(tab_use_all) <- cnames

apa_table(
  tab_use_all, caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.',
  digits = 3
)
(#tab:unnamed-chunk-16)
Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.
gender Knows TCM Has used TCM Has used Homeopathy Has passed on Homeopathy Has used Accupuncture
Female 0.955 0.628 0.682 0.439 0.611
Male 0.908 0.414 0.409 0.185 0.394

Below we give the descriptives weighted according to the Statistik Austria joint population distribution of gender, age, and education.

library('survey')

# Add 'age_group' and 'education_group' columns
dat_var_all <- create_age_groups(dat_var_all)
dat_var_all <- create_education_groups(dat_var_all)

get_prop <- function(formula, weighting) {
  tab <- svytable(formula, weighting, round = TRUE)
  row <- (tab / rowSums(tab))[, 1]
  round(as.numeric(row), 3)
}

# Create the survey design
design <- svydesign(ids = ~1, data = dat_var_all, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)

tab_use_all_weighted <- data.frame(
  'gender' = c('Female', 'Male'),
  'Knows TCM' = get_prop(~gender + tcm_known, ps),
  'Has used TCM' = get_prop(~gender + tcm_use, ps),
  'Has used Homeopathy' = get_prop(~gender + globuli_use, ps),
  'Has passed on Homeopathy' = get_prop(~gender + globuli_pass, ps),
  'Has used Accupuncture' = get_prop(~gender + needle_use, ps)
)
colnames(tab_use_all_weighted) <- cnames

apa_table(
  tab_use_all_weighted,
  caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-17)
Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.
gender Knows TCM Has used TCM Has used Homeopathy Has passed on Homeopathy Has used Accupuncture
Female 0.899 0.583 0.716 0.434 0.619
Male 0.906 0.512 0.451 0.187 0.486

Descriptives on participants who know TCM

Here we remove all participants who do not know any or either of TCM, homeopathy, or accupuncture. We also remove those who do not know the answer to the relevant questions (e.g., those that do not know how often they use TCM). This removes 232 participants.

Since we have tcm_money, which indicates how much money they have spent on TCM, we do not include tcm_moneyrel, which indicates how much money relative to their other medical spending they have spent on TCM. tcm_moneyrel is also only available for people who took the online survey.

dat_clean <- dat %>% 
  select(-tcm_moneyrel) %>% 
  filter(
    # Remove people who don't know TCM or who don't know the answer
    tcm_known != 2,
    !(tcm_use %in% c(3, 4)),
    !(tcm_frequency %in% c(6, 7)),
    !(tcm_trust %in% c(6, 7)),
    !(tcm_science %in% c(6, 7)),
    !(tcm_money %in% c(8, 9)),
    
    # Remove people who don't know homeopathy or who don't know the answer
    !(globuli_use %in% c(3, 4)),
    !(globuli_pass %in% c(3, 4)),
    
    # Remove people who don't know accupuncture or who don't know the answer
    !(needle_use %in% c(3, 4)),
    !(needle_frequency %in% c(6, 7))
  )

nrow(dat) - nrow(dat_clean)
## [1] 232

We recode the variables so that an increase in number is associated with an increase in whatever the variable measures (e.g., money or agreement). We also recode binary variables so that 0 indicates absence or negation. We also remove those participants who had a NA in any of their answers, because otherwise the proportions in the descriptives table refer to different population sizes. This excludes 13 participants. (Note that there were no NAs in education, income, or med_money; these variables will be used later.)

dat_var <- dat_clean %>% 
  select(
    gender, age, disease, starts_with('tcm'),
    starts_with('globuli'), starts_with('vax'),
    starts_with('needle'), income, education, med_money
  ) %>% 
  drop_na() %>% # removes 13 people
  mutate(
    # tcm_trust, tcm_science, vax_use, vax_fresh need to be
    # reverse coded so that larger means stronger agreement
    tcm_trust = 6 - tcm_trust,
    tcm_science = 6 - tcm_science,
    vax_use = 6 - vax_use,
    vax_fresh = 6 - vax_fresh,
    
    # binary indicators need to be recoded so that 1 means yes
    disease = 3 - disease,
    tcm_known = 3 - tcm_known,
    tcm_use = 3 - tcm_use,
    globuli_use = 3 - globuli_use,
    globuli_pass = 3 - globuli_pass,
    needle_use = 3 - needle_use
  )

# Recode variables to start at 0, not at 1 (except age)
dat_var <- as_tibble(dat_var - 1) %>%  mutate(age = age + 1)
nrow(dat_clean) - nrow(dat_var)
## [1] 13

The table below shows the proportion of men and women who know TCM, have used TCM, have used homeopathy, passed on homeopathy to others, or have used accupuncture in the last three years.

tab_use <- dat_var %>% 
  group_by(gender) %>% 
  summarize(
    tcm_known_m = mean(tcm_known == 1),
    tcm_use_m = mean(tcm_use == 1),
    globuli_use_m = mean(globuli_use == 1),
    globuli_pass_m = mean(globuli_pass == 1),
    needle_use_m = mean(needle_use == 1),
  ) %>% 
  mutate_all(
    round, 3
  ) %>% 
  mutate(
    gender = ifelse(gender == 0, 'Female', 'Male')
  )

colnames(tab_use) <- c(
  'Gender', 'Knows TCM', 'Has used TCM', 'Has used Homeopathy',
  'Has passed on Homeopathy', 'Has used Accupuncture'
)

apa_table(
  tab_use, caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.',
  digits = 3
)
(#tab:unnamed-chunk-20)
Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.
Gender Knows TCM Has used TCM Has used Homeopathy Has passed on Homeopathy Has used Accupuncture
Female 1.000 0.683 0.703 0.464 0.652
Male 1.000 0.470 0.429 0.211 0.432

We again present the descriptive statistics using the same survey weighting procedure as above.

# Add 'age_group' and 'education_group' columns
dat_var <- create_age_groups(dat_var)
dat_var <- create_education_groups(dat_var)

# Adjust 'gender' in population joint distribution (0-1 coding instead of 1-2 coding)
pop_joint$gender <- pop_joint$gender - 1

# Create the survey design
design <- svydesign(ids = ~1, data = dat_var, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)

# Since we recoded the variables above, we need to adjust this function
# The value '1' now indicates use / knowledge
get_prop <- function(formula, weighting) {
  tab <- svytable(formula, weighting, round = TRUE)
  
  if (ncol(tab) == 1) {
    row <- (tab / rowSums(tab))[, 1]
  } else {
    row <- (tab / rowSums(tab))[, 2]
  }
  
  round(as.numeric(row), 3)
}

tab_use_weighted <- data.frame(
  'gender' = c('Female', 'Male'),
  'Knows TCM' = get_prop(~gender + tcm_known, ps),
  'Has used TCM' = get_prop(~gender + tcm_use, ps),
  'Has used Homeopathy' = get_prop(~gender + globuli_use, ps),
  'Has passed on Homeopathy' = get_prop(~gender + globuli_pass, ps),
  'Has used Accupuncture' = get_prop(~gender + needle_use, ps)
)
colnames(tab_use_weighted) <- cnames

apa_table(
  tab_use_weighted,
  caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-21)
Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.
gender Knows TCM Has used TCM Has used Homeopathy Has passed on Homeopathy Has used Accupuncture
Female 1.000 0.680 0.739 0.464 0.687
Male 1.000 0.509 0.436 0.236 0.492

TCM and Accupuncture Frequency

The table below shows how often men and women have used TCM in the last three years.

create_table <- function(dat, var, colname, labels) {
  tab <- table(dat$gender, dat[[var]], dnn = c('Gender', colname))
  rownames(tab) <- c('Female', 'Male')
  colnames(tab) <- labels
  
  # Normalize across columns
  tab <- t(apply(tab, 1, function(x) round(x / sum(x), 3)))
  tab
}

frequency_labels <- c(
  '0', '1 - <5', '5 - <10', '10 - <20', '>= 20'
)

tab_tcm <- create_table(dat_var, 'tcm_frequency', 'Frequency of TCM Use', frequency_labels)
apa_table(
  tab_tcm, caption = 'Table 2. Summarizes Frequency of TCM use in the last 3 years.',
  digits = 3
)
(#tab:unnamed-chunk-22)
Table 2. Summarizes Frequency of TCM use in the last 3 years.
0 1 - <5 5 - <10 10 - <20 >= 20
Female 0.411 0.262 0.117 0.087 0.123
Male 0.647 0.174 0.063 0.032 0.085

We again present the weighted results.

create_weighted_table <- function(formula, weighting, cnames) {
  
  tab <- svytable(formula, weighting, round = TRUE)
  tab <- round(tab / rowSums(tab), 3)
  
  rownames(tab) <- c('Female', 'Male')
  colnames(tab) <- cnames
  class(tab) <- c('matrix', 'array')
  tab
}

tab_tcm_weighted <- create_weighted_table(~gender + tcm_frequency, ps, frequency_labels)
apa_table(
  tab_tcm_weighted, caption = 'Table 2. Summarizes Frequency of TCM use in the last 3 years using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-23)
Table 2. Summarizes Frequency of TCM use in the last 3 years using survey weights.
0 1 - <5 5 - <10 10 - <20 >= 20
Female 0.411 0.271 0.121 0.093 0.104
Male 0.605 0.145 0.084 0.056 0.109

The table below shows how often men and women have used accupuncture in the last three years.

tab_needle <- create_table(
  dat_var, 'needle_frequency', 'Frequency of Accupuncture Use',
  frequency_labels
)

apa_table(
  tab_needle, caption = 'Table 3. Summarizes Accupuncture use in the last 3 years.',
  digits = 3
)
(#tab:unnamed-chunk-24)
Table 3. Summarizes Accupuncture use in the last 3 years.
0 1 - <5 5 - <10 10 - <20 >= 20
Female 0.533 0.248 0.083 0.066 0.070
Male 0.732 0.098 0.063 0.044 0.063

We again present the weighted results.

tab_needle_weighted <- create_weighted_table(~gender + needle_frequency, ps, frequency_labels)
apa_table(
  tab_needle_weighted,
  caption = 'Table 3. Summarizes Accupuncture use in the last 3 years using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-25)
Table 3. Summarizes Accupuncture use in the last 3 years using survey weights.
0 1 - <5 5 - <10 10 - <20 >= 20
Female 0.514 0.267 0.080 0.071 0.069
Male 0.697 0.124 0.074 0.073 0.031

Trust in TCM and Science of TCM

The table below shows the trust in doctors with TCM training across the genders.

agree_labels <- c(
   'Do not Agree', 'Rather Disagree',
   'Partly Agree', 'Mostly Agree', 'Completely Agree'
)

tab_trust <- create_table(
  dat_var, 'tcm_trust', 'Trust in Doctors with TCM training',
  agree_labels
)

apa_table(
  tab_trust, caption = 'Table 6. Summarizes Trust in doctors with TCM training.',
  digits = 3
)
(#tab:unnamed-chunk-26)
Table 6. Summarizes Trust in doctors with TCM training.
Do not Agree Rather Disagree Partly Agree Mostly Agree Completely Agree
Female 0.048 0.040 0.102 0.187 0.623
Male 0.117 0.095 0.126 0.218 0.445

We again present the weighted results.

tab_trust_weighted <- create_weighted_table(~gender + tcm_trust, ps, agree_labels)
apa_table(
  tab_trust_weighted, caption = 'Table 6. Summarizes Trust in doctors with TCM training using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-27)
Table 6. Summarizes Trust in doctors with TCM training using survey weights.
Do not Agree Rather Disagree Partly Agree Mostly Agree Completely Agree
Female 0.043 0.029 0.095 0.204 0.629
Male 0.124 0.064 0.110 0.215 0.487

The table below shows the strength with which people believe / agree that TCM is scientific across the genders.

tab_science <- create_table(
  dat_var, 'tcm_science', 'Believes TCM to be scientific',
  agree_labels
)

apa_table(
  tab_science, caption = 'Table 7. Summarizes the agreement with TCM being scientific.',
  digits = 3
)
(#tab:unnamed-chunk-28)
Table 7. Summarizes the agreement with TCM being scientific.
Do not Agree Rather Disagree Partly Agree Mostly Agree Completely Agree
Female 0.089 0.090 0.193 0.248 0.380
Male 0.189 0.114 0.237 0.249 0.211

We again present the weighted results.

tab_science_weighted <- create_weighted_table(~gender + tcm_science, ps, agree_labels)
apa_table(
  tab_science_weighted, caption = 'Table 7. Summarizes the agreement with TCM being scientific using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-29)
Table 7. Summarizes the agreement with TCM being scientific using survey weights.
Do not Agree Rather Disagree Partly Agree Mostly Agree Completely Agree
Female 0.080 0.079 0.176 0.255 0.409
Male 0.181 0.103 0.218 0.261 0.236

TCM and Medical Expenses

We remove participants who did not know there medical expenses. This removes 34 participants. The table below shows medical expenses across the genders.

dat_varm <- filter(dat_var, med_money != 7)
nrow(dat_var) - nrow(dat_varm)
## [1] 34
money_labels <- c(
  '0', '1 - <100', '100 - <250', '250 - <500',
  '500 - <750', '750 - <1000', '>= 1000'
)

tab_med <- create_table(dat_varm, 'med_money', 'Medical Expenses', money_labels)
apa_table(tab_med, caption = 'Table 4. Summarizes Medical expenses in the last 3 years.', digits = 3)
(#tab:unnamed-chunk-31)
Table 4. Summarizes Medical expenses in the last 3 years.
0 1 - <100 100 - <250 250 - <500 500 - <750 750 - <1000 >= 1000
Female 0.053 0.129 0.131 0.188 0.149 0.100 0.251
Male 0.110 0.153 0.169 0.169 0.097 0.097 0.205

We again present the weighted results.

# Create the survey design with new data
design <- svydesign(ids = ~1, data = dat_varm, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)

tab_med_weighted <- create_weighted_table(~gender + med_money, ps, money_labels)
apa_table(
  tab_med_weighted, caption = 'Table 4. Summarizes Medical expenses in the last 3 years using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-32)
Table 4. Summarizes Medical expenses in the last 3 years using survey weights.
0 1 - <100 100 - <250 250 - <500 500 - <750 750 - <1000 >= 1000
Female 0.067 0.130 0.139 0.159 0.153 0.087 0.265
Male 0.114 0.157 0.167 0.202 0.079 0.103 0.178

The table below shows TCM expenses across the genders.

tab_tcm <- create_table(dat_var, 'tcm_money', 'TCM Expenses', money_labels)
apa_table(tab_tcm, caption = 'Table 5. Summarizes TCM expenses in the last 3 years.', digits = 3)
(#tab:unnamed-chunk-33)
Table 5. Summarizes TCM expenses in the last 3 years.
0 1 - <100 100 - <250 250 - <500 500 - <750 750 - <1000 >= 1000
Female 0.434 0.111 0.111 0.145 0.075 0.046 0.078
Male 0.666 0.076 0.076 0.044 0.054 0.044 0.041

We again present the weighted results.

tab_tcm_weighted <- create_weighted_table(~gender + tcm_money, ps, money_labels)
apa_table(
  tab_tcm_weighted, caption = 'Table 5. Summarizes TCM expenses in the last 3 years using survey weights.',
  digits = 3
)
(#tab:unnamed-chunk-34)
Table 5. Summarizes TCM expenses in the last 3 years using survey weights.
0 1 - <100 100 - <250 250 - <500 500 - <750 750 - <1000 >= 1000
Female 0.425 0.099 0.128 0.132 0.075 0.054 0.088
Male 0.599 0.070 0.139 0.053 0.056 0.051 0.032

Statistical Analysis

Preprocessing

Since we analyze data from participants who all know TCM, we remove the variable tcm_known from the dataframe.

table(dat_var$tcm_known) # all participants know TCM
## 
##    1 
## 1062
dat_stat <- select(dat_var, -tcm_known)

dim(dat_stat)
## [1] 1062   19

This data holds 1062 participants, 156 of which wished to not report their income group.

table(dat_stat$income) # 11 means not reported
## 
##   0   1   2   3   4   5   6   7   8   9  10  11 
##  68  18  63 109 152 151 147  66  45  23  64 156

Similarly, 34 people did not know their medical expenses.

table(dat_stat$med_money) # 7 means not reported
## 
##   0   1   2   3   4   5   6   7 
##  72 140 146 187 137 102 244  34

We recode these to be NA and will impute these data later.

dat_stat <- dat_stat %>% 
  mutate(
    income = ifelse(income == 11, NA, income), # 11 is NA
    med_money = ifelse(med_money == 7, NA, med_money) # 7 is NA
  ) %>% 
  # re-order columns for later
  dplyr::select(
    gender, age, income, education, med_money, disease,
    tcm_use, tcm_frequency, tcm_science, tcm_trust, tcm_money,
    globuli_use, globuli_pass, vax_use, vax_fresh, needle_use, needle_frequency
  )

sum(is.na(dat_stat$income)) # 156 reported no income
## [1] 156
sum(is.na(dat_stat$med_money)) # 34 reported no medical expenses
## [1] 34

The use variables do not add more information than the frequency variables, and so we remove them from subsequent analysis.

with(dat_stat, table(needle_use, needle_frequency))
##           needle_frequency
## needle_use   0   1   2   3   4
##          0 438   1   0   0   0
##          1 191 215  82  63  72
with(dat_stat, table(tcm_use, tcm_frequency))
##        tcm_frequency
## tcm_use   0   1   2   3   4
##       0 403   1   0   0   0
##       1 108 249 107  75 119
dat_stat <- dplyr::select(dat_stat, -needle_use, -tcm_use)

Bayesian Gaussian Copula Graphical Model

We use a Bayesian Gaussian copula graphical model to explore the multivariate dependencies in our data. Most of our variables are ordinal and binary, except for age, which we treat as continuous. We use a Gaussian copula graphical model which allows us to model each variable on its proper domain. This is implemented in the R package BGGM (Williams & Mulder, 2020).

apply(dat_stat, 2, function(x) length(unique(x)))
##           gender              age           income        education 
##                2               70               12                8 
##        med_money          disease    tcm_frequency      tcm_science 
##                8                2                5                5 
##        tcm_trust        tcm_money      globuli_use     globuli_pass 
##                5                7                2                2 
##          vax_use        vax_fresh needle_frequency 
##                5                5                5
library('BGGM')
library('qgraph')
library('ggplot2')
library('RColorBrewer')

# Grouped into A, B, C, D, E groups (see below)
node_names <- c(
  'Gender', 'Age', 'Income', 'Education', 'Medical expenses', 'Chronic disease',
  'TCM usage frequency', 'Perception of scientific support', 'Trust in TCM-certified MDs',
  'TCM expenses', 'Homeopathy usage', 'Homeopathy propagation', 'Vaccination usage',
  'Booster vaccination', 'Acupuncture usage frequency'
)
  
plot_graph <- function(mat, errors, node_names, main = NULL, legend = TRUE, ...) {
  # Groups: Individual variables, TCM, Homeopathy, Vaccination, Accupuncture
  cols <- brewer.pal(5, 'Set3')
  
  groups <- c(
    rep('A. Individual variables', 6),
    rep('B. TCM', 4),
    rep('C. Homeopathy', 2),
    rep('D. Vaccination', 2),
    rep('E. Acupuncture', 1)
  )
  
  qgraph(
    # mat, edge.color = ifelse(mat < 0, cols[4], cols[5]),
    mat, edge.color = ifelse(mat < 0, 'darkred', 'darkblue'),
    pie = errors,
    layout = 'circle',
    pieColor = 'skyblue',
    color = cols,
    groups = groups,
    nodeNames = node_names,
    legend.mode = 'style1',
    legend = legend, ...
  )
  
  if (!is.null(main)) {
    title(main, font.main = 1, line = 2.8, cex.main = 1.8)
  }
}

We impute the missing data using mice.

library('mice')

dat_c <- mice(dat_stat, m = 10, printFlag = FALSE)
dat_c <- complete(dat_c)

We present two analyses: one with and one without post-stratification.

Without post-stratification

We first present the analysis without post-stratification. We visualize the network below. Note that relations between nodes represent partial correlations.

if (!file.exists('fitted-bggm.RDS')) {
  m <- estimate(dat_c, iter = 4000, type = 'mixed', analytic = FALSE, impute = FALSE)
  pred <- predictability(m, iter = 1000)
  saveRDS(list('pred' = pred, 'm' = m), 'fitted-bggm.RDS')
} else {
  obj <- readRDS('fitted-bggm.RDS')
  m <- obj$m
  pred <- obj$pred
}

mat <- m$pcor_mat
errors <- sapply(pred$scores, mean)

# pdf('Figures/Bayesian-pcor.pdf', width = 6, height = 6)
plot_graph(
  mat, errors, node_names,
  main = 'Partial correlation network', legend.cex = 0.50, legend = F
)

# dev.off()

We visualize the correlation network below.

library('corpcor')

diag(mat) <- 1
mat_cor <- pcor2cor(mat)

# pdf('Figures/Bayesian-cor2.pdf', width = 10, height = 8)
plot_graph(
  mat_cor, errors, node_names,
  main = 'Correlation network', legend.cex = 0.50, legend = T
)

# dev.off()

We visualize the partial correlations between all variables below.

mapping <- list()
for (i in seq(length(node_names))) {
  mapping[colnames(dat_c)[i]] <- i
}

convert <- function(s) {
  terms <- strsplit(s, '--')[[1]]
  paste0(mapping[[terms[1]]], '-', mapping[[terms[2]]])
}

ss <- summary(m)
ds <- ss$dat_results
ds$Relation <- sapply(ss$dat_results$Relation, convert)
ds$Relation <- reorder(ds$Relation, ds$Post.mean)

# pdf('Figures/Partial-Correlations.pdf', width = 10, height = 5)
ggplot(ds, aes(x = Relation, y = Post.mean)) +
  geom_hline(aes(yintercept = 0), color = 'gray76') +
  geom_errorbar(aes(ymin = Cred.lb, ymax = Cred.ub), cex = 0.50) +
  geom_point(size = 0.75) +
  scale_y_continuous(limits = c(-1, 1), breaks = seq(-1, 1, 0.50)) +
  xlab('Pairs of nodes') +
  ylab('Partial correlation') +
  ggtitle('Partial correlations between all variables') +
  custom_theme +
  theme(axis.text.x = element_text(angle = 90, size = 6))

# dev.off()

Below the point estimates and confidence intervals.

ss
## BGGM: Bayesian Gaussian Graphical Models 
## --- 
## Type: mixed 
## Analytic: FALSE 
## Formula:  
## Posterior Samples: 4000 
## Observations (n):
## Nodes (p): 15 
## Relations: 105 
## --- 
## Call: 
## estimate(Y = dat_c, type = "mixed", analytic = FALSE, iter = 4000, 
##     impute = FALSE)
## --- 
## Estimates:
##                         Relation Post.mean Post.sd Cred.lb Cred.ub
##                      gender--age    -0.121   0.046  -0.210  -0.030
##                   gender--income     0.357   0.045   0.265   0.446
##                      age--income     0.458   0.030   0.398   0.516
##                gender--education    -0.226   0.045  -0.313  -0.137
##                   age--education    -0.101   0.036  -0.172  -0.029
##                income--education     0.429   0.032   0.363   0.492
##                gender--med_money    -0.100   0.053  -0.206   0.004
##                   age--med_money     0.040   0.038  -0.033   0.114
##                income--med_money     0.117   0.043   0.036   0.201
##             education--med_money     0.004   0.040  -0.075   0.083
##                  gender--disease    -0.072   0.059  -0.187   0.040
##                     age--disease     0.186   0.040   0.108   0.261
##                  income--disease    -0.022   0.047  -0.111   0.072
##               education--disease    -0.064   0.043  -0.149   0.020
##               med_money--disease     0.118   0.046   0.029   0.205
##            gender--tcm_frequency    -0.047   0.068  -0.176   0.087
##               age--tcm_frequency    -0.080   0.049  -0.177   0.016
##            income--tcm_frequency    -0.084   0.051  -0.186   0.014
##         education--tcm_frequency     0.067   0.047  -0.025   0.158
##         med_money--tcm_frequency    -0.177   0.049  -0.270  -0.078
##           disease--tcm_frequency     0.076   0.057  -0.037   0.191
##              gender--tcm_science     0.023   0.056  -0.091   0.128
##                 age--tcm_science     0.100   0.040   0.019   0.177
##              income--tcm_science    -0.010   0.044  -0.096   0.079
##           education--tcm_science    -0.083   0.042  -0.164   0.000
##           med_money--tcm_science    -0.064   0.046  -0.157   0.025
##             disease--tcm_science    -0.073   0.050  -0.173   0.026
##       tcm_frequency--tcm_science     0.062   0.057  -0.051   0.170
##                gender--tcm_trust    -0.073   0.057  -0.183   0.040
##                   age--tcm_trust    -0.050   0.042  -0.129   0.030
##                income--tcm_trust     0.061   0.047  -0.030   0.152
##             education--tcm_trust    -0.021   0.048  -0.112   0.075
##             med_money--tcm_trust     0.033   0.050  -0.065   0.132
##               disease--tcm_trust     0.081   0.053  -0.021   0.183
##         tcm_frequency--tcm_trust    -0.043   0.061  -0.162   0.076
##           tcm_science--tcm_trust     0.560   0.041   0.486   0.647
##                gender--tcm_money    -0.024   0.067  -0.153   0.107
##                   age--tcm_money     0.097   0.049   0.000   0.192
##                income--tcm_money     0.129   0.050   0.032   0.227
##             education--tcm_money    -0.090   0.048  -0.185   0.006
##             med_money--tcm_money     0.204   0.051   0.103   0.304
##               disease--tcm_money    -0.009   0.058  -0.123   0.104
##         tcm_frequency--tcm_money     0.637   0.034   0.569   0.700
##           tcm_science--tcm_money     0.070   0.056  -0.036   0.179
##             tcm_trust--tcm_money     0.108   0.060  -0.007   0.224
##              gender--globuli_use    -0.053   0.084  -0.207   0.120
##                 age--globuli_use    -0.122   0.058  -0.231  -0.006
##              income--globuli_use    -0.117   0.063  -0.241   0.006
##           education--globuli_use     0.010   0.064  -0.115   0.134
##           med_money--globuli_use     0.080   0.069  -0.062   0.207
##             disease--globuli_use     0.105   0.079  -0.042   0.263
##       tcm_frequency--globuli_use    -0.096   0.086  -0.261   0.074
##         tcm_science--globuli_use     0.155   0.073   0.018   0.299
##           tcm_trust--globuli_use     0.017   0.075  -0.119   0.166
##           tcm_money--globuli_use     0.159   0.084  -0.006   0.321
##             gender--globuli_pass    -0.183   0.078  -0.330  -0.027
##                age--globuli_pass     0.019   0.058  -0.096   0.130
##             income--globuli_pass     0.207   0.060   0.091   0.325
##          education--globuli_pass    -0.066   0.061  -0.183   0.054
##          med_money--globuli_pass     0.057   0.066  -0.075   0.187
##            disease--globuli_pass    -0.155   0.074  -0.301  -0.012
##      tcm_frequency--globuli_pass     0.127   0.076  -0.017   0.279
##        tcm_science--globuli_pass    -0.064   0.069  -0.200   0.074
##          tcm_trust--globuli_pass     0.098   0.070  -0.038   0.234
##          tcm_money--globuli_pass    -0.116   0.076  -0.271   0.026
##        globuli_use--globuli_pass     0.655   0.048   0.551   0.742
##                  gender--vax_use     0.097   0.076  -0.058   0.248
##                     age--vax_use    -0.059   0.055  -0.169   0.053
##                  income--vax_use    -0.055   0.062  -0.173   0.069
##               education--vax_use     0.060   0.059  -0.052   0.178
##               med_money--vax_use     0.231   0.053   0.125   0.330
##                 disease--vax_use    -0.021   0.072  -0.158   0.122
##           tcm_frequency--vax_use     0.053   0.071  -0.086   0.188
##             tcm_science--vax_use    -0.163   0.064  -0.286  -0.035
##               tcm_trust--vax_use     0.126   0.089  -0.020   0.366
##               tcm_money--vax_use    -0.040   0.073  -0.181   0.102
##             globuli_use--vax_use    -0.199   0.110  -0.381   0.059
##            globuli_pass--vax_use    -0.082   0.098  -0.279   0.103
##                gender--vax_fresh    -0.126   0.069  -0.258   0.013
##                   age--vax_fresh    -0.024   0.051  -0.123   0.075
##                income--vax_fresh     0.115   0.056   0.007   0.225
##             education--vax_fresh    -0.011   0.053  -0.115   0.091
##             med_money--vax_fresh    -0.100   0.052  -0.199   0.006
##               disease--vax_fresh     0.015   0.065  -0.112   0.142
##         tcm_frequency--vax_fresh    -0.052   0.067  -0.180   0.080
##           tcm_science--vax_fresh     0.082   0.059  -0.037   0.199
##             tcm_trust--vax_fresh    -0.107   0.065  -0.246   0.011
##             tcm_money--vax_fresh     0.009   0.066  -0.119   0.138
##           globuli_use--vax_fresh     0.160   0.085  -0.008   0.319
##          globuli_pass--vax_fresh    -0.026   0.083  -0.190   0.135
##               vax_use--vax_fresh     0.824   0.022   0.779   0.866
##         gender--needle_frequency     0.024   0.066  -0.107   0.151
##            age--needle_frequency    -0.016   0.050  -0.114   0.080
##         income--needle_frequency     0.008   0.050  -0.094   0.105
##      education--needle_frequency     0.052   0.047  -0.038   0.146
##      med_money--needle_frequency     0.150   0.048   0.055   0.240
##        disease--needle_frequency     0.019   0.057  -0.095   0.129
##  tcm_frequency--needle_frequency     0.384   0.047   0.291   0.472
##    tcm_science--needle_frequency     0.040   0.056  -0.071   0.149
##      tcm_trust--needle_frequency    -0.034   0.058  -0.148   0.078
##      tcm_money--needle_frequency     0.254   0.050   0.151   0.352
##    globuli_use--needle_frequency    -0.057   0.086  -0.222   0.118
##   globuli_pass--needle_frequency    -0.037   0.078  -0.188   0.117
##        vax_use--needle_frequency    -0.176   0.068  -0.305  -0.041
##      vax_fresh--needle_frequency     0.154   0.064   0.026   0.278
## ---

We visualize how much variance is explained for each variable by all variables combined.

library('latex2exp')

colnames(pred$Y) <- node_names

# pdf('Figures/Variance-Expained.pdf', width = 8, height = 5)
plot(pred, type = 'ridgeline') +
  xlab(TeX('Bayesian $$R^2$$')) +
  ggtitle('Variance explained') +
  ylab('') +
  guides(fill = FALSE) +
  scale_x_continuous(limits = c(0, 0.80), breaks = seq(0, 0.80, .2)) +
  custom_theme +
  theme(axis.text.x = element_text(angle = 0))

# dev.off()

Below are the posterior means and posterior standard deviations.

pred
## BGGM: Bayesian Gaussian Graphical Models 
## --- 
## Metric: Bayes R2
## Type: mixed 
## --- 
## Estimates:
## 
##                              Node Post.mean Post.sd Cred.lb Cred.ub
##                            Gender     0.357   0.043   0.277   0.450
##                               Age     0.441   0.028   0.386   0.497
##                            Income     0.462   0.025   0.413   0.512
##                         Education     0.323   0.026   0.272   0.378
##                  Medical expenses     0.251   0.035   0.183   0.321
##                   Chronic disease     0.176   0.059   0.088   0.314
##               TCM usage frequency     0.517   0.026   0.464   0.566
##  Perception of scientific support     0.512   0.034   0.454   0.584
##        Trust in TCM-certified MDs     0.336   0.024   0.284   0.384
##                      TCM expenses     0.666   0.023   0.624   0.714
##                  Homeopathy usage     0.410   0.034   0.332   0.463
##            Homeopathy propagation     0.534   0.028   0.483   0.587
##                 Vaccination usage     0.489   0.039   0.395   0.549
##               Booster vaccination     0.539   0.024   0.496   0.588
##       Acupuncture usage frequency     0.638   0.026   0.591   0.690

With post-stratification

BGGM does not yet support post-stratification in the estimation procedure. We therefore draw \(n\) observations from the data with replacement according to the weights defined by the population distribution of age, gender, and education, and estimate the model each time. This creates post-stratified estimates.

dat_ps <- create_education_groups(create_age_groups(dat_c))
dat_ps <- left_join(dat_ps, pop_joint) 

design2 <- svydesign(ids = ~1, data = dat_ps, weights = NULL)
ps2 <- postStratify(design2, ~gender + age_group + education_group, pop_joint, partial = TRUE)
dat_ps$weights <- weights(ps2)


if (!file.exists('fitted-bggm-ps.RDS')) {
  
  library('doParallel')
  registerDoParallel(cores = 6)
  
  times <- 250
  res <- foreach(i = seq(times), .combine = 'rbind') %dopar% {
    d <- slice_sample(dat_ps, n = nrow(dat_ps), replace = TRUE, weight_by = weights)
    d <- dplyr::select(d, -age_group, -education_group, -Freq, -weights)
    
    m <- estimate(d, iter = 2000, type = 'mixed', analytic = FALSE, impute = FALSE)
    pred <- predictability(m, iter = 1000)
    
    list('m' = m, 'pred' = pred)
  }
  
  saveRDS(res, 'fitted-bggm-ps.RDS')
  
} else {
  
  res <- readRDS('fitted-bggm-ps.RDS')
  
}

We combine the estimates and visualize the partial correlation network below.

library('abind')

pcor_post <- lapply(seq(nrow(res)), function(i) {
  x <- res[i, ]
  x$m$post_samp$pcors
})

errors_post <- lapply(seq(nrow(res)), function(i) {
  x <- res[i, ]
  do.call('cbind', x$pred$scores)
})

pcor_post <- abind(pcor_post)
errors_post <- do.call('rbind', errors_post)

mat_ps <- apply(pcor_post, c(1, 2), mean)
errors_ps <- apply(errors_post, 2, mean)

# pdf('Figures/Bayesian-pcor-ps.pdf', width = 6, height = 6)
plot_graph(
  mat_ps, errors_ps, node_names,
  main = 'Partial correlation network (post-stratified)', legend.cex = 0.50, legend = F
)

# dev.off()

We visualize the correlation network below.

mat_ps2 <- mat_ps
diag(mat_ps2) <- 1
mat_cor_ps <- pcor2cor(mat_ps2)

# pdf('Figures/Bayesian-cor2-ps.pdf', width = 10, height = 8)
plot_graph(
  mat_cor_ps, errors_ps, node_names,
  main = 'Correlation network (post-stratified)', legend.cex = 0.50, legend = TRUE
)

# dev.off()

We visualize the partial correlations between all variables below.

ms_ps <- res[1, ]$m
ms_ps$pcor_mat <- mat_ps
ms_ps$post_samp$pcors <- pcor_post
ms_ps$iter <- dim(pcor_post)[3] - (50 * 150)
# ms_ps$post_samp$fisher_z <- fisher_r_to_z(pcor_post)

ss_ps <- summary(ms_ps)
ds_ps <- ss_ps$dat_results
ds_ps$Relation <- sapply(ss_ps$dat_results$Relation, convert)
ds_ps$Relation <- reorder(ds_ps$Relation, ds_ps$Post.mean)

# pdf('Figures/Partial-Correlations-ps.pdf', width = 10, height = 5)
ggplot(ds_ps, aes(x = Relation, y = Post.mean)) +
  geom_hline(aes(yintercept = 0), color = 'gray76') +
  geom_errorbar(aes(ymin = Cred.lb, ymax = Cred.ub), cex = 0.50) +
  geom_point(size = 0.75) +
  scale_y_continuous(limits = c(-1, 1), breaks = seq(-1, 1, 0.50)) +
  xlab('Pairs of nodes') +
  ylab('Partial correlation') +
  ggtitle('Partial correlations between all variables (post-stratified)') +
  custom_theme +
  theme(axis.text.x = element_text(angle = 90, size = 6))

# dev.off()

Below the point estimates and confidence intervals.

ss_ps
## BGGM: Bayesian Gaussian Graphical Models 
## --- 
## Type: mixed 
## Analytic: FALSE 
## Formula:  
## Posterior Samples: 505000 
## Observations (n):
## Nodes (p): 15 
## Relations: 105 
## --- 
## Call: 
## estimate(Y = d, type = "mixed", analytic = FALSE, iter = 2000, 
##     impute = FALSE)
## --- 
## Estimates:
##                         Relation Post.mean Post.sd Cred.lb Cred.ub
##                      gender--age    -0.151   0.065  -0.280  -0.024
##                   gender--income     0.309   0.068   0.172   0.440
##                      age--income     0.382   0.047   0.291   0.472
##                gender--education    -0.172   0.073  -0.309  -0.020
##                   age--education    -0.121   0.053  -0.223  -0.015
##                income--education     0.329   0.060   0.210   0.442
##                gender--med_money    -0.049   0.074  -0.195   0.095
##                   age--med_money     0.074   0.055  -0.033   0.182
##                income--med_money     0.030   0.061  -0.092   0.148
##             education--med_money     0.008   0.060  -0.109   0.126
##                  gender--disease    -0.237   0.083  -0.399  -0.075
##                     age--disease     0.159   0.065   0.031   0.285
##                  income--disease    -0.036   0.072  -0.170   0.112
##               education--disease    -0.100   0.068  -0.232   0.037
##               med_money--disease     0.094   0.068  -0.043   0.226
##            gender--tcm_frequency     0.121   0.099  -0.075   0.316
##               age--tcm_frequency     0.027   0.075  -0.120   0.176
##            income--tcm_frequency    -0.186   0.078  -0.340  -0.033
##         education--tcm_frequency     0.058   0.073  -0.087   0.200
##         med_money--tcm_frequency    -0.155   0.084  -0.317   0.013
##           disease--tcm_frequency     0.051   0.096  -0.133   0.241
##              gender--tcm_science    -0.017   0.084  -0.183   0.146
##                 age--tcm_science     0.044   0.058  -0.071   0.155
##              income--tcm_science     0.021   0.070  -0.109   0.164
##           education--tcm_science    -0.075   0.068  -0.205   0.059
##           med_money--tcm_science    -0.040   0.074  -0.180   0.109
##             disease--tcm_science    -0.091   0.079  -0.244   0.067
##       tcm_frequency--tcm_science     0.009   0.094  -0.174   0.193
##                gender--tcm_trust    -0.017   0.086  -0.179   0.161
##                   age--tcm_trust     0.045   0.061  -0.071   0.169
##                income--tcm_trust     0.022   0.071  -0.120   0.160
##             education--tcm_trust    -0.027   0.084  -0.179   0.150
##             med_money--tcm_trust     0.065   0.078  -0.092   0.214
##               disease--tcm_trust     0.093   0.083  -0.065   0.262
##         tcm_frequency--tcm_trust     0.052   0.100  -0.146   0.250
##           tcm_science--tcm_trust     0.594   0.070   0.462   0.727
##                gender--tcm_money    -0.011   0.095  -0.198   0.175
##                   age--tcm_money     0.020   0.071  -0.121   0.156
##                income--tcm_money     0.160   0.079   0.006   0.319
##             education--tcm_money    -0.118   0.075  -0.269   0.027
##             med_money--tcm_money     0.223   0.078   0.063   0.371
##               disease--tcm_money     0.040   0.092  -0.143   0.216
##         tcm_frequency--tcm_money     0.620   0.058   0.498   0.722
##           tcm_science--tcm_money     0.125   0.091  -0.060   0.295
##             tcm_trust--tcm_money    -0.011   0.101  -0.209   0.186
##              gender--globuli_use    -0.194   0.140  -0.439   0.120
##                 age--globuli_use    -0.032   0.095  -0.212   0.161
##              income--globuli_use    -0.165   0.101  -0.364   0.032
##           education--globuli_use    -0.010   0.111  -0.208   0.233
##           med_money--globuli_use    -0.081   0.104  -0.288   0.119
##             disease--globuli_use    -0.028   0.130  -0.266   0.246
##       tcm_frequency--globuli_use    -0.230   0.126  -0.474   0.016
##         tcm_science--globuli_use     0.207   0.116  -0.025   0.429
##           tcm_trust--globuli_use    -0.079   0.130  -0.328   0.179
##           tcm_money--globuli_use     0.246   0.126  -0.011   0.490
##             gender--globuli_pass    -0.123   0.127  -0.368   0.132
##                age--globuli_pass    -0.128   0.091  -0.304   0.053
##             income--globuli_pass     0.250   0.093   0.066   0.431
##          education--globuli_pass    -0.060   0.102  -0.262   0.137
##          med_money--globuli_pass     0.182   0.099  -0.018   0.371
##            disease--globuli_pass    -0.126   0.118  -0.360   0.103
##      tcm_frequency--globuli_pass     0.280   0.120   0.044   0.511
##        tcm_science--globuli_pass    -0.098   0.117  -0.330   0.130
##          tcm_trust--globuli_pass     0.157   0.121  -0.085   0.391
##          tcm_money--globuli_pass    -0.145   0.126  -0.402   0.097
##        globuli_use--globuli_pass     0.701   0.072   0.560   0.841
##                  gender--vax_use     0.008   0.127  -0.222   0.286
##                     age--vax_use    -0.151   0.072  -0.290  -0.007
##                  income--vax_use    -0.040   0.100  -0.242   0.153
##               education--vax_use    -0.078   0.099  -0.262   0.128
##               med_money--vax_use     0.193   0.079   0.031   0.341
##                 disease--vax_use    -0.077   0.105  -0.278   0.140
##           tcm_frequency--vax_use     0.028   0.108  -0.188   0.234
##             tcm_science--vax_use    -0.260   0.088  -0.428  -0.082
##               tcm_trust--vax_use     0.103   0.127  -0.109   0.385
##               tcm_money--vax_use    -0.009   0.106  -0.216   0.202
##             globuli_use--vax_use    -0.055   0.160  -0.348   0.291
##            globuli_pass--vax_use    -0.081   0.147  -0.367   0.211
##                gender--vax_fresh    -0.080   0.109  -0.298   0.130
##                   age--vax_fresh     0.020   0.070  -0.115   0.159
##                income--vax_fresh     0.141   0.090  -0.034   0.322
##             education--vax_fresh     0.105   0.081  -0.056   0.262
##             med_money--vax_fresh    -0.083   0.074  -0.226   0.065
##               disease--vax_fresh     0.055   0.096  -0.138   0.239
##         tcm_frequency--vax_fresh    -0.059   0.099  -0.250   0.140
##           tcm_science--vax_fresh     0.184   0.086   0.013   0.349
##             tcm_trust--vax_fresh    -0.082   0.094  -0.280   0.092
##             tcm_money--vax_fresh     0.018   0.099  -0.177   0.213
##           globuli_use--vax_fresh     0.031   0.137  -0.247   0.296
##          globuli_pass--vax_fresh     0.010   0.136  -0.261   0.275
##               vax_use--vax_fresh     0.849   0.032   0.796   0.895
##         gender--needle_frequency    -0.146   0.092  -0.324   0.035
##            age--needle_frequency    -0.107   0.069  -0.245   0.026
##         income--needle_frequency     0.108   0.082  -0.052   0.268
##      education--needle_frequency     0.051   0.075  -0.094   0.201
##      med_money--needle_frequency     0.102   0.082  -0.059   0.263
##        disease--needle_frequency    -0.020   0.092  -0.203   0.158
##  tcm_frequency--needle_frequency     0.421   0.076   0.273   0.572
##    tcm_science--needle_frequency    -0.017   0.087  -0.193   0.149
##      tcm_trust--needle_frequency     0.000   0.097  -0.186   0.193
##      tcm_money--needle_frequency     0.256   0.084   0.084   0.411
##    globuli_use--needle_frequency     0.063   0.135  -0.188   0.346
##   globuli_pass--needle_frequency    -0.216   0.124  -0.465   0.026
##        vax_use--needle_frequency    -0.199   0.108  -0.404   0.015
##      vax_fresh--needle_frequency     0.165   0.103  -0.035   0.363
## ---

We visualize how much variance is explained for each variable by all variables combined.

pred <- res[1, ]$pred

errors_post2 <- lapply(seq(15), function(i) {
  preds <- res[, 2]
  variable <- lapply(preds, function(x) x$scores[[i]])
  do.call('c', variable)
})

pred$scores <- errors_post2
colnames(pred$Y) <- node_names

# pdf('Figures/Variance-Expained-ps.pdf', width = 8, height = 5)
plot(pred, type = 'ridgeline') +
  xlab(TeX('Bayesian $$R^2$$')) +
  ggtitle('Variance explained (post-stratified)') +
  ylab('') +
  guides(fill = FALSE) +
  scale_x_continuous(limits = c(0, 0.80), breaks = seq(0, 0.80, .2)) +
  custom_theme +
  theme(axis.text.x = element_text(angle = 0))

# dev.off()

Below are the posterior means and posterior standard deviations.

pred
## BGGM: Bayesian Gaussian Graphical Models 
## --- 
## Metric: Bayes R2
## Type: mixed 
## --- 
## Estimates:
## 
##                              Node Post.mean Post.sd Cred.lb Cred.ub
##                            Gender     0.313   0.049   0.221   0.414
##                               Age     0.442   0.037   0.366   0.511
##                            Income     0.419   0.041   0.337   0.498
##                         Education     0.330   0.050   0.237   0.432
##                  Medical expenses     0.260   0.049   0.168   0.358
##                   Chronic disease     0.242   0.062   0.132   0.372
##               TCM usage frequency     0.529   0.039   0.456   0.607
##  Perception of scientific support     0.549   0.038   0.473   0.621
##        Trust in TCM-certified MDs     0.370   0.034   0.305   0.436
##                      TCM expenses     0.666   0.030   0.607   0.725
##                  Homeopathy usage     0.414   0.034   0.346   0.481
##            Homeopathy propagation     0.556   0.028   0.500   0.611
##                 Vaccination usage     0.493   0.033   0.422   0.553
##               Booster vaccination     0.562   0.025   0.514   0.613
##       Acupuncture usage frequency     0.645   0.033   0.580   0.711